Library

rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite)
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'DmIALite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'DOLite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'DOLiteTerm' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'HsIALite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'MmIALite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'RnIALite' introuvable
library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
library(FactoMineR)
library(factoextra)
## Le chargement a nécessité le package : ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(sva)
## Le chargement a nécessité le package : mgcv
## Le chargement a nécessité le package : nlme
## 
## Attachement du package : 'nlme'
## L'objet suivant est masqué depuis 'package:dplyr':
## 
##     collapse
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
## Le chargement a nécessité le package : genefilter
## Le chargement a nécessité le package : BiocParallel
library(xlsx)
library(clusterProfiler)
## 
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   inner_join.phylo    tidytree
##   inner_join.treedata tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## clusterProfiler v4.8.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attachement du package : 'clusterProfiler'
## L'objet suivant est masqué depuis 'package:stats':
## 
##     filter
library(pheatmap)
library(rdist)
library(DEP)
library(SummarizedExperiment)
## Le chargement a nécessité le package : MatrixGenerics
## Le chargement a nécessité le package : matrixStats
## 
## Attachement du package : 'matrixStats'
## Les objets suivants sont masqués depuis 'package:genefilter':
## 
##     rowSds, rowVars
## L'objet suivant est masqué depuis 'package:dplyr':
## 
##     count
## 
## Attachement du package : 'MatrixGenerics'
## Les objets suivants sont masqués depuis 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Les objets suivants sont masqués depuis 'package:genefilter':
## 
##     rowSds, rowVars
## Le chargement a nécessité le package : GenomicRanges
## Le chargement a nécessité le package : stats4
## Le chargement a nécessité le package : BiocGenerics
## 
## Attachement du package : 'BiocGenerics'
## Les objets suivants sont masqués depuis 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## Les objets suivants sont masqués depuis 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Le chargement a nécessité le package : S4Vectors
## 
## Attachement du package : 'S4Vectors'
## L'objet suivant est masqué depuis 'package:clusterProfiler':
## 
##     rename
## Les objets suivants sont masqués depuis 'package:dplyr':
## 
##     first, rename
## L'objet suivant est masqué depuis 'package:utils':
## 
##     findMatches
## Les objets suivants sont masqués depuis 'package:base':
## 
##     expand.grid, I, unname
## Le chargement a nécessité le package : IRanges
## 
## Attachement du package : 'IRanges'
## L'objet suivant est masqué depuis 'package:clusterProfiler':
## 
##     slice
## L'objet suivant est masqué depuis 'package:nlme':
## 
##     collapse
## Les objets suivants sont masqués depuis 'package:dplyr':
## 
##     collapse, desc, slice
## Le chargement a nécessité le package : GenomeInfoDb
## Le chargement a nécessité le package : Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attachement du package : 'Biobase'
## L'objet suivant est masqué depuis 'package:MatrixGenerics':
## 
##     rowMedians
## Les objets suivants sont masqués depuis 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(org.Hs.eg.db)
## Le chargement a nécessité le package : AnnotationDbi
## 
## Attachement du package : 'AnnotationDbi'
## L'objet suivant est masqué depuis 'package:clusterProfiler':
## 
##     select
## L'objet suivant est masqué depuis 'package:dplyr':
## 
##     select
## 
library(pathview)
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
## 
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
library(enrichplot)
library(DOSE)
## DOSE v3.26.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
library(biomaRt)

deal_with_NA <- function(df, column, value_to_change, final_value){
  for (i in column){
    print(i)
    data.table::set(df,which(df[[i]] == value_to_change), i, final_value)
  }
}

"%ni%" <- Negate("%in%")

Initialisation

Data loading

data.dir <- "/media/alexis/DATA/Proteomic/"

Discovery_Cohort_Proteomic <- read.csv(paste0(data.dir,"Discovery_cohort.csv"))

Discovery_Cohort_Proteomic_unimputed <- read.csv(paste0(data.dir,"Discovery_cohort_unimputed.csv"), check.names = F)

Cell_lines_Proteomic <- read.csv(paste0(data.dir,"Cell_lines.csv"))

CD34_Proteomic <- read.csv(paste0(data.dir,"CD34.csv"), check.names = F)

Validation_cohort_Proteomic <- read.csv(paste0(data.dir,"Validation_cohort.csv"), dec = ",")

Clinicals loading

Discovery_clinicals <- readxl::read_excel("/media/alexis/DATA/Proteomic/1-s2.0-S1535610822000587-mmc7/Data_S1_Discovery_Cohort.xlsx", sheet = "Clinical Characteristics")

Discovery_Genotype <- readxl::read_excel("/media/alexis/DATA/Proteomic/1-s2.0-S1535610822000587-mmc7/Data_S1_Discovery_Cohort.xlsx", sheet = "PanelSeq I")

Discovery_Genotype_II <- readxl::read_excel("/media/alexis/DATA/Proteomic/1-s2.0-S1535610822000587-mmc7/Data_S1_Discovery_Cohort.xlsx", sheet = "PanelSeq II")

Discovery_Genotype_II <- Discovery_Genotype_II[,c(1,3,4,2)]
colnames(Discovery_Genotype_II) <- c("Pat_ID", "Type", "Frequency", "Gene")

Discovery_Genotype <- rbind(Discovery_Genotype, Discovery_Genotype_II)
rm(Discovery_Genotype_II)

Phenotyping

Functions

Make_mutation_subgroups <- function(mutations_of_interest_A, mutations_of_interest_B, mutations_to_skip, genotype = Discovery_Genotype, no_mutated_patients = c("F126", "F86", "F129", "F30")){
  genotype_of_interest_A <- dplyr::filter(genotype, Gene %in% mutations_of_interest_A) %>% .$Pat_ID %>% unique()
  genotype_of_interest_B <- dplyr::filter(genotype, Gene %in% mutations_of_interest_B) %>% .$Pat_ID %>% unique()

  patient_to_skip <- dplyr::filter(genotype, Gene %in% mutations_to_skip) %>% .$Pat_ID %>% unique()
  genotype_control <- genotype$Pat_ID[genotype$Pat_ID %ni% patient_to_skip] %>% unique()
  genotype_of_interest_A <- intersect(genotype_of_interest_A, genotype_control)
  genotype_of_interest_B <- intersect(genotype_of_interest_B, genotype_control)

  genotype_control <- c(genotype_control[genotype_control %ni% genotype_of_interest_A & genotype_control %ni% genotype_of_interest_B], no_mutated_patients)
  
  pheno <- data.frame(Patient_ID = c(genotype_of_interest_A, genotype_of_interest_B, patient_to_skip, genotype_control), 
                      pheno = c(rep(mutations_of_interest_A, length(genotype_of_interest_A)), rep(mutations_of_interest_B, length(genotype_of_interest_B)), rep("Others", length(patient_to_skip)), rep("Group_control", length(genotype_control))))
  return(pheno)  
}
Pheno <- Make_mutation_subgroups("IDH1", "IDH2",c())
rownames(Pheno) <- Pheno$Patient_ID

Pheno <- Pheno[colnames(Discovery_Cohort_Proteomic)[1:177],]
Pheno$pheno %>% table
## .
## Group_control          IDH1          IDH2 
##           139            14            24

Preparing analysis

Making sub-data

data <- Discovery_Cohort_Proteomic_unimputed
data$PG.Genes %>% duplicated() %>% any()
## [1] TRUE
data %>% group_by(PG.Genes) %>% summarize(frequency = n()) %>% arrange(desc(frequency)) %>% filter(frequency > 1)
## # A tibble: 1 × 2
##   PG.Genes frequency
##   <chr>        <int>
## 1 ""              14
data_unique <- make_unique(data, "PG.Genes", "PG.UniProtIds", delim = ";")

data_unique_log_reverted <- sapply(data_unique[,1:177], function(x){
  sapply(x, function(y){
    exp(y)
  })
})

IDHwt_samples <- dplyr::filter(Pheno, pheno == "Group_control") %>% .$Patient_ID
# IDH1

sampling_IDHwt_IDH1 <- (Pheno$pheno == "IDH1") %>% table %>% .[2] %>% sample(IDHwt_samples, ., replace = F)
data_unique_log_reverted_IDH1_IDHwt <- data_unique_log_reverted[,(Pheno$pheno == "IDH1" | Pheno$Patient_ID %in% sampling_IDHwt_IDH1)]
data_unique_log_reverted_IDH1_IDHwt <- cbind(data_unique_log_reverted_IDH1_IDHwt, data_unique[,178:183])
Pheno_IDH1_wt <- Pheno[Pheno$pheno == "IDH1" | Pheno$Patient_ID %in% sampling_IDHwt_IDH1,]
Pheno_IDH1_wt$pheno <- sapply(Pheno_IDH1_wt$pheno, function(sample){
  ifelse(stringr::str_detect(sample, "IDH1"), "mIDH1", "wtIDH")
})
pheno_se <- data.frame("label" = colnames(data)[1:177], condition = Pheno$pheno, replicate = 1:177)
pheno_se_idh1_wt <- pheno_se[pheno_se$condition == "IDH1" | pheno_se$label %in% sampling_IDHwt_IDH1,]
data_se_idh1_wt <- make_se(data_unique_log_reverted_IDH1_IDHwt, 1:nrow(Pheno_IDH1_wt), pheno_se_idh1_wt)
data_se_idh1_wt
## class: SummarizedExperiment 
## dim: 6536 28 
## metadata(0):
## assays(1): ''
## rownames(6536): A1BG A2M ... Q6ZSR9 R4GMS1
## rowData names(6): PG.ProteinAccessions PG.Genes ... name ID
## colnames(28): Group_control_4 Group_control_7 ... IDH1_160
##   Group_control_176
## colData names(4): label ID condition replicate
plot_frequency(data_se_idh1_wt)

# IDH2

sampling_IDHwt_IDH2 <- (Pheno$pheno == "IDH2") %>% table %>% .[2] %>% sample(IDHwt_samples, ., replace = F)
data_unique_log_reverted_IDH2_IDHwt <- data_unique_log_reverted[,(Pheno$pheno == "IDH2" | Pheno$Patient_ID %in% sampling_IDHwt_IDH2)]
data_unique_log_reverted_IDH2_IDHwt <- cbind(data_unique_log_reverted_IDH2_IDHwt, data_unique[,178:183])
Pheno_IDH2_wt <- Pheno[Pheno$pheno == "IDH2" | Pheno$Patient_ID %in% sampling_IDHwt_IDH2,]
Pheno_IDH2_wt$pheno <- sapply(Pheno_IDH2_wt$pheno, function(sample){
  ifelse(stringr::str_detect(sample, "IDH2"), "mIDH2", "wtIDH")
})
pheno_se_idh2_wt <- pheno_se[pheno_se$condition == "IDH2" | pheno_se$label %in% sampling_IDHwt_IDH2,]
data_se_idh2_wt <- make_se(data_unique_log_reverted_IDH2_IDHwt, 1:nrow(Pheno_IDH2_wt), pheno_se_idh2_wt)
data_se_idh2_wt
## class: SummarizedExperiment 
## dim: 6536 48 
## metadata(0):
## assays(1): ''
## rownames(6536): A1BG A2M ... Q6ZSR9 R4GMS1
## rowData names(6): PG.ProteinAccessions PG.Genes ... name ID
## colnames(48): IDH2_2 Group_control_3 ... Group_control_170 IDH2_175
## colData names(4): label ID condition replicate
plot_frequency(data_se_idh2_wt)

# IDH1 vs IDH2

data_unique_log_reverted_IDH1_IDH2 <- data_unique_log_reverted[,Pheno$pheno == "IDH1" | Pheno$pheno == "IDH2"]
data_unique_log_reverted_IDH1_IDH2 <- cbind(data_unique_log_reverted_IDH1_IDH2, data_unique[,178:183])
Pheno_IDH <- Pheno[Pheno$pheno == "IDH1" | Pheno$pheno == "IDH2",]
pheno_se_IDH <- pheno_se[pheno_se$condition == "IDH1" | pheno_se$condition == "IDH2",]
data_se_IDH <- make_se(data_unique_log_reverted_IDH1_IDH2, 1:nrow(Pheno_IDH), pheno_se_IDH)
data_se_IDH
## class: SummarizedExperiment 
## dim: 6536 38 
## metadata(0):
## assays(1): ''
## rownames(6536): A1BG A2M ... Q6ZSR9 R4GMS1
## rowData names(6): PG.ProteinAccessions PG.Genes ... name ID
## colnames(38): IDH2_2 IDH1_13 ... IDH2_163 IDH2_175
## colData names(4): label ID condition replicate
plot_frequency(data_se_IDH)

Looking at missing values

data_filt_IDH1 <- filter_missval(data_se_idh1_wt, thr = 0)
data_filt_IDH2 <- filter_missval(data_se_idh2_wt, thr = 0)
data_filt_IDH <- filter_missval(data_se_IDH, thr = 0)

plot_numbers(data_filt_IDH1)

plot_numbers(data_filt_IDH2)

plot_numbers(data_filt_IDH)

plot_coverage(data_filt_IDH1)

plot_coverage(data_filt_IDH2)

plot_coverage(data_filt_IDH)

Normalisation

for (i in colnames(data_filt_IDH1@assays@data@listData[[1]])){
    data_filt_IDH1@assays@data@listData[[1]][,i][is.nan(data_filt_IDH1@assays@data@listData[[1]][,i])]<-NA
}

for (i in colnames(data_filt_IDH2@assays@data@listData[[1]])){
    data_filt_IDH2@assays@data@listData[[1]][,i][is.nan(data_filt_IDH2@assays@data@listData[[1]][,i])]<-NA
}
for (i in colnames(data_filt_IDH@assays@data@listData[[1]])){
    data_filt_IDH@assays@data@listData[[1]][,i][is.nan(data_filt_IDH@assays@data@listData[[1]][,i])]<-NA
}

data_norm_IDH1 <- normalize_vsn(data_filt_IDH1)
data_norm_IDH2 <- normalize_vsn(data_filt_IDH2)
data_norm_IDH <- normalize_vsn(data_filt_IDH)

plot_normalization(data_filt_IDH1[,1:5], data_norm_IDH1[,1:5])

plot_normalization(data_filt_IDH2[,1:5], data_norm_IDH2[,1:5])

plot_normalization(data_filt_IDH[,1:5], data_norm_IDH[,1:5])

plot_missval(data_filt_IDH1[, 1:10])

plot_missval(data_filt_IDH2[, 1:10])

plot_missval(data_filt_IDH[, 1:10])

plot_detect(data_filt_IDH1)

plot_detect(data_filt_IDH2)

plot_detect(data_filt_IDH)

Imputation of missing values

data_imp_IDH1 <- impute(data_norm_IDH1, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.9486871
data_imp_IDH2 <- impute(data_norm_IDH2, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.9053707
data_imp_IDH <- impute(data_norm_IDH, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.8914037
data_imp_IDH1@assays@data@listData[[1]] %>%
  write.table("~/GitHub/Thesis_paper/Datasets/Proteomic/mIDH1_data.tsv", 
              sep = "\t", row.names = T, col.names = NA, quote = F)
data_imp_IDH2@assays@data@listData[[1]] %>%
  write.table("~/GitHub/Thesis_paper/Datasets/Proteomic/mIDH2_data.tsv", 
              sep = "\t", row.names = T, col.names = NA, quote = F)
data_imp_IDH@assays@data@listData[[1]] %>%
  write.table("~/GitHub/Thesis_paper/Datasets/Proteomic/mIDH_data.tsv", 
              sep = "\t", row.names = T, col.names = NA, quote = F)

plot_imputation(data_norm_IDH1, data_imp_IDH1)

plot_imputation(data_norm_IDH2, data_imp_IDH2)

plot_imputation(data_norm_IDH, data_imp_IDH)

Comparison of pheno analysis

DEG

data_diff_IDH1 <- test_diff(data_imp_IDH1, type = "control", control = "Group_control")
## Tested contrasts: IDH1_vs_Group_control
data_diff_IDH2 <- test_diff(data_imp_IDH2, type = "control", control = "Group_control")
## Tested contrasts: IDH2_vs_Group_control
data_diff_IDH <- test_diff(data_imp_IDH, type = "control", control = "IDH1")
## Tested contrasts: IDH2_vs_IDH1
dep_IDH1 <- add_rejections(data_diff_IDH1, alpha = 0.05, lfc = 0.5)
dep_IDH2 <- add_rejections(data_diff_IDH2, alpha = 0.05, lfc = 0.5)
dep_IDH <- add_rejections(data_diff_IDH, alpha = 0.05, lfc = 0.5)

PCA

plot_pca(dep_IDH1, n = 500, point_size = 4, label = F, indicate = "condition")
## Warning: Use of `pca_df[[indicate[1]]]` is discouraged.
## ℹ Use `.data[[indicate[1]]]` instead.

plot_pca(dep_IDH2, n = 500, point_size = 4, label = F, indicate = "condition")
## Warning: Use of `pca_df[[indicate[1]]]` is discouraged.
## ℹ Use `.data[[indicate[1]]]` instead.

plot_pca(dep_IDH, n = 500, point_size = 4, label = F, indicate = "condition")
## Warning: Use of `pca_df[[indicate[1]]]` is discouraged.
## ℹ Use `.data[[indicate[1]]]` instead.

Heatmap

plot_cor(dep_IDH1, significant = FALSE, lower = 0.75, upper = 1, pal = "Reds")

plot_cor(dep_IDH2, significant = FALSE, lower = 0.75, upper = 1, pal = "Reds")

plot_cor(dep_IDH, significant = FALSE, lower = 0.75, upper = 1, pal = "Reds")

plot_heatmap(dep_IDH1, type = "centered", kmeans = FALSE, 
             k = 2, show_row_names = FALSE,
             indicate = "condition")

plot_heatmap(dep_IDH2, type = "centered", kmeans = FALSE, 
             k = 2, show_row_names = FALSE,
             indicate = "condition")

plot_heatmap(dep_IDH, type = "centered", kmeans = FALSE, 
             k = 2, show_row_names = FALSE,
             indicate = "condition")

Volcano

plot_volcano(dep_IDH1, contrast = "IDH1_vs_Group_control", label_size = 5, add_names = TRUE, adjusted = F)

plot_volcano(dep_IDH2, contrast = "IDH2_vs_Group_control", label_size = 5, add_names = TRUE, adjusted = F)

plot_volcano(dep_IDH, contrast = "IDH2_vs_IDH1", label_size = 5, add_names = TRUE, adjusted = F)

plot_single(dep_IDH1, proteins = "CEBPA", type = "centered") + theme(legend.position = "none")

#plot_single(dep_IDH2, proteins = "CEBPZ", type = "centered") + theme(legend.position = "none")
plot_single(dep_IDH, proteins = "CEBPA", type = "centered") + theme(legend.position = "none")

plot_single(dep_IDH1, proteins = "HDGFRP2", type = "centered") + theme(legend.position = "none")

plot_single(dep_IDH2, proteins = "HDGFRP2", type = "centered") + theme(legend.position = "none")

plot_single(dep_IDH, proteins = "HDGFRP2", type = "centered") + theme(legend.position = "none")

df_long_IDH1 <- get_df_long(dep_IDH1)
df_long_IDH2 <- get_df_long(dep_IDH2)
df_long_IDH <- get_df_long(dep_IDH)

Saving DEG

Diff_Prot_exp_IDH1 <- dep_IDH1@elementMetadata@listData %>% 
  as.data.frame() %>%
  dplyr::select(c("name", "IDH1_vs_Group_control_diff", "IDH1_vs_Group_control_p.val"))

Diff_Prot_exp_IDH2 <- dep_IDH2@elementMetadata@listData %>% 
  as.data.frame() %>%
  dplyr::select(c("name", "IDH2_vs_Group_control_diff", "IDH2_vs_Group_control_p.val"))

Diff_Prot_exp_IDH <- dep_IDH@elementMetadata@listData %>% 
  as.data.frame() %>%
  dplyr::select(c("name", "IDH2_vs_IDH1_diff", "IDH2_vs_IDH1_p.val"))

write.table(Diff_Prot_exp_IDH1, "~/GitHub/Thesis_paper/Results/Proteo/Diff_Prot_exp_IDH1_IDHwt.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH1_sig <- Diff_Prot_exp_IDH1 %>% 
  dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & abs(IDH1_vs_Group_control_diff) > 0.5)
  
Diff_Prot_exp_IDH1_sig %>%
  write.table("~/GitHub/Thesis_paper/Results/Proteo/Diff_sig_Prot_exp_IDH1_IDHwt.tsv", quote = F, row.names = F, sep = "\t")




write.table(Diff_Prot_exp_IDH2, "~/GitHub/Thesis_paper/Results/Proteo/Diff_Prot_exp_IDH2_IDHwt.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH2_sig <- Diff_Prot_exp_IDH2 %>% 
  dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & abs(IDH2_vs_Group_control_diff) > 0.5) 
Diff_Prot_exp_IDH2_sig %>%
  write.table("~/GitHub/Thesis_paper/Results/Proteo/Diff_sig_Prot_exp_IDH2_IDHwt.tsv", quote = F, row.names = F, sep = "\t")




write.table(Diff_Prot_exp_IDH, "~/GitHub/Thesis_paper/Results/Proteo/Diff_Prot_exp_IDH2_IDH1.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH_sig <- Diff_Prot_exp_IDH %>% 
  dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & abs(IDH2_vs_IDH1_diff) > 0.5) 
Diff_Prot_exp_IDH_sig %>% 
  write.table("~/GitHub/Thesis_paper/Results/Proteo/Diff_sig_Prot_exp_IDH2_IDH1.tsv", quote = F, row.names = F, sep = "\t")


Diff_Prot_exp_IDH1 %>% 
  dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & abs(IDH1_vs_Group_control_diff) > 0.5) %>%
  .$IDH1_vs_Group_control_diff %>% hist(breaks = 20)

Diff_Prot_exp_IDH2 %>% 
  dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & abs(IDH2_vs_Group_control_diff) > 0.5) %>%
  .$IDH2_vs_Group_control_diff %>% hist(breaks = 20)

Diff_Prot_exp_IDH %>% 
  dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & abs(IDH2_vs_IDH1_diff) > 0.5) %>%
  .$IDH2_vs_IDH1_diff %>% hist(breaks = 20)

GO KEGG WP Enrichments

Functions

GO_KEGG_WP_MKEGG <- function(List_positive, List_negative){
  list_enrichments <- list()
  list_enrichments[["GO_positive"]] <- enrichGO(List_positive, OrgDb = org.Hs.eg.db, universe = GO_universe, keyType = "SYMBOL", pvalueCutoff = 0.2)
  list_enrichments[["GO_negative"]] <- enrichGO(List_negative, OrgDb = org.Hs.eg.db, universe = GO_universe, keyType = "SYMBOL", pvalueCutoff = 0.2)
  
  entrez_id_positive <- dplyr::filter(genes, hgnc_symbol %in% List_positive) %>%
    .$entrezgene_id %>% unique
  entrez_id_negative <- dplyr::filter(genes, hgnc_symbol %in% List_negative) %>%
    .$entrezgene_id %>% unique
  universe_entrez <- dplyr::filter(genes, hgnc_symbol %in% GO_universe) %>%
    .$entrezgene_id %>% unique
  
  list_enrichments[["KEGG_positive"]] <- enrichKEGG(entrez_id_positive, organism = "hsa", 
                                                  keyType = 'ncbi-geneid', pvalueCutoff = 0.1, 
                                                  pAdjustMethod = "none", universe = universe_entrez)
  list_enrichments[["KEGG_negative"]] <- enrichKEGG(entrez_id_negative, organism = "hsa", 
                                                    keyType = 'ncbi-geneid', pvalueCutoff = 0.1, 
                                                    pAdjustMethod = "none", universe = universe_entrez)
  
  list_enrichments[["MKEGG_positive"]] <- enrichMKEGG(entrez_id_positive, organism = "hsa", 
                                                    keyType = 'ncbi-geneid', pvalueCutoff = 0.1, 
                                                    pAdjustMethod = "none", universe = universe_entrez)
  list_enrichments[["MKEGG_negative"]] <- enrichMKEGG(entrez_id_negative, organism = "hsa", 
                                                      keyType = 'ncbi-geneid', pvalueCutoff = 0.1, 
                                                      pAdjustMethod = "none", universe = universe_entrez)  
  
  list_enrichments[["WP_positive"]] <- enrichWP(entrez_id_positive, organism = "Homo sapiens", 
                                              universe = universe_entrez)
  list_enrichments[["WP_negative"]] <- enrichWP(entrez_id_negative, organism = "Homo sapiens", 
                                                universe = universe_entrez)
  
  res <- lapply(names(list_enrichments), function(onto){
    list_enrichments[[onto]]@result$qvalue <- list_enrichments[[onto]]@result$pvalue
    list_enrichments[[onto]]@result$p.adjust <- list_enrichments[[onto]]@result$pvalue
    list_enrichments[[onto]]
  })
  
  names(res) <- names(list_enrichments)
  
  return(res)
}

Make_enrich_plot <- function(enrichment, path){
  n_enrich <- nrow(dplyr::filter(enrichment@result, pvalue < 0.1))
  if(n_enrich == 0){
    return(NULL)
  }
  height_plot <- 200 + (n_enrich *120)
  p <- dotplot(enrichment, showCategory = n_enrich)
  ggsave(path, p, bg = "white", width = 3800, height = height_plot, units = "px", limitsize = FALSE)
  p
}
mart <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
                         dataset = "hsapiens_gene_ensembl")

GO_universe <- Diff_Prot_exp_IDH1$name %>% unique

genes <- getBM(filters = "hgnc_symbol",
               attributes = c("ensembl_gene_id","entrezgene_id", "hgnc_symbol"),
               values = GO_universe, 
               mart = mart)

Diff prot

hs <- org.Hs.eg.db

DEProts_IDH1_up <- Diff_Prot_exp_IDH1 %>% 
  dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & IDH1_vs_Group_control_diff > 0.5) %>% 
  .$name

DEProts_IDH2_up <- Diff_Prot_exp_IDH2 %>% 
  dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & IDH2_vs_Group_control_diff > 0.5) %>% 
  .$name

DEProts_IDH_up <- Diff_Prot_exp_IDH %>% 
  dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & IDH2_vs_IDH1_diff > 0.5) %>% 
  .$name

DEProts_IDH1_down <- Diff_Prot_exp_IDH1 %>% 
  dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & IDH1_vs_Group_control_diff < -0.5) %>% 
  .$name

DEProts_IDH2_down <- Diff_Prot_exp_IDH2 %>% 
  dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & IDH2_vs_Group_control_diff < -0.5) %>% 
  .$name

DEProts_IDH_down <- Diff_Prot_exp_IDH %>% 
  dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & IDH2_vs_IDH1_diff < -0.5) %>% 
  .$name

GO KEGG WP

IDH1_GO_KEGG_WP_vs_Control <- GO_KEGG_WP_MKEGG(DEProts_IDH1_up, DEProts_IDH1_down)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
IDH2_GO_KEGG_WP_vs_Control <- GO_KEGG_WP_MKEGG(DEProts_IDH2_up, DEProts_IDH2_down)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
IDH2_GO_KEGG_WP_vs_IDH1 <- GO_KEGG_WP_MKEGG(DEProts_IDH_up, DEProts_IDH_down)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...

Saving plots

lapply(names(IDH1_GO_KEGG_WP_vs_Control), function(enrichment){
  Make_enrich_plot(IDH1_GO_KEGG_WP_vs_Control[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/IDH1_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]
## NULL
## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

lapply(names(IDH2_GO_KEGG_WP_vs_Control), function(enrichment){
  Make_enrich_plot(IDH2_GO_KEGG_WP_vs_Control[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/IDH2_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

lapply(names(IDH2_GO_KEGG_WP_vs_IDH1), function(enrichment){
  Make_enrich_plot(IDH2_GO_KEGG_WP_vs_IDH1[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/IDH1_vs_IDH2_", enrichment, ".png"))
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

Saving tables

lapply(names(IDH1_GO_KEGG_WP_vs_Control), function(enrichment){
  IDH1_GO_KEGG_WP_vs_Control[[enrichment]]@result %>% .[c(1:5, 8:9)] %>% 
    write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/IDH1_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
lapply(names(IDH2_GO_KEGG_WP_vs_Control), function(enrichment){
  IDH2_GO_KEGG_WP_vs_Control[[enrichment]]@result %>% .[c(1:5, 8:9)] %>% 
    write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/IDH2_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
lapply(names(IDH2_GO_KEGG_WP_vs_IDH1), function(enrichment){
  IDH2_GO_KEGG_WP_vs_IDH1[[enrichment]]@result %>% .[c(1:5, 8:9)] %>% 
    write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/IDH2_vs_IDH1_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL

GSEA analysis

Functions

Do_GSEA_analysis <- function(Diff_prot_data){
  diff_prot <- Diff_prot_data[3] < 0.05
  deprots <- Diff_prot_data[3][diff_prot]
  names(deprots) <- Diff_prot_data$name[diff_prot]
  deprots <- sort(deprots, decreasing = TRUE)

  deprot_entrez <- deprots[names(deprots) %in% genes$hgnc_symbol]
  names(deprot_entrez) <- sapply(names(deprot_entrez), function(hgnc_name){
    dplyr::filter(genes, hgnc_symbol == hgnc_name) %>% .$entrezgene_id %>% unlist %>% unique
  })
  
  list_enrichments <- list()
  
  list_enrichments[["KEGG"]] <- gseKEGG(deprot_entrez, minGSSize = 2, pvalueCutoff = 0.1, pAdjustMethod = "none", verbose = T)
  list_enrichments[["MKEGG"]] <- gseMKEGG(deprot_entrez, minGSSize = 2, pvalueCutoff = 0.1, pAdjustMethod = "none", verbose = T)
  list_enrichments[["WP"]] <- gseWP(deprot_entrez, organism = "Homo sapiens", minGSSize = 2, pvalueCutoff = 0.1, pAdjustMethod = "none", verbose = T)
                   
  res <- lapply(names(list_enrichments), function(onto){
    list_enrichments[[onto]]@result$qvalue <- list_enrichments[[onto]]@result$pvalue
    list_enrichments[[onto]]@result$p.adjust <- list_enrichments[[onto]]@result$pvalue
    list_enrichments[[onto]]
  })
  
  names(res) <- names(list_enrichments)
  res
}

KEGG MKEGG WP GSEA

IDH1_wt_GSEA <- Do_GSEA_analysis(Diff_Prot_exp_IDH1)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## no term enriched under specific pvalueCutoff...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
IDH2_wt_GSEA <- Do_GSEA_analysis(Diff_Prot_exp_IDH2)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## no term enriched under specific pvalueCutoff...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
IDH2_IDH1_GSEA <- Do_GSEA_analysis(Diff_Prot_exp_IDH)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## no term enriched under specific pvalueCutoff...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...

Saving plots

lapply(names(IDH1_wt_GSEA), function(enrichment){
  Make_enrich_plot(IDH1_wt_GSEA[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/GSEA/GSEA_IDH1_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

## 
## [[2]]
## NULL
## 
## [[3]]

lapply(names(IDH2_wt_GSEA), function(enrichment){
  Make_enrich_plot(IDH2_wt_GSEA[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/GSEA/GSEA_IDH2_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

## 
## [[2]]
## NULL
## 
## [[3]]

lapply(names(IDH2_IDH1_GSEA), function(enrichment){
  Make_enrich_plot(IDH2_IDH1_GSEA[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/GSEA/GSEA_IDH2_vs_IDH1_", enrichment, ".png"))
})
## [[1]]

## 
## [[2]]
## NULL
## 
## [[3]]

Saving tables

lapply(names(IDH1_wt_GSEA), function(enrichment){
  if(nrow(IDH1_wt_GSEA[[enrichment]]@result) > 0){
    IDH1_wt_GSEA[[enrichment]]@result %>% .[c(1:6, 9:11)] %>% 
      write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/GSEA/IDH1_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
  }
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
lapply(names(IDH2_wt_GSEA), function(enrichment){
  if(nrow(IDH2_wt_GSEA[[enrichment]]@result) > 0){
    IDH2_wt_GSEA[[enrichment]]@result %>% .[c(1:6, 9:11)] %>% 
      write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/GSEA/IDH2_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
  }
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
lapply(names(IDH2_IDH1_GSEA), function(enrichment){
  if(nrow(IDH2_IDH1_GSEA[[enrichment]]@result) > 0){
    IDH2_IDH1_GSEA[[enrichment]]@result %>% .[c(1:6, 9:11)] %>% 
      write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/GSEA/IDH2_vs_IDH1_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
  }
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL

Make fppi network

Functions

Make_fPPI_specific_network <- function(Diff_data_sig){
  dplyr::filter(fPPI, Gene1 %in% Diff_data_sig$name | Gene2 %in% Diff_data_sig$name)
}

Making Network

fPPI <- read.table("~/Documents/List_genes_from_transcriptome/FIsInGene_020720_with_annotations.tsv", sep = "\t", header = T)

fPPI_IDH1_wt <- dplyr::filter(fPPI, Gene1 %in% Diff_Prot_exp_IDH1_sig$name | Gene2 %in% Diff_Prot_exp_IDH1_sig$name)
fPPI_IDH2_wt <- dplyr::filter(fPPI, Gene1 %in% Diff_Prot_exp_IDH2_sig$name | Gene2 %in% Diff_Prot_exp_IDH2_sig$name)
fPPI_IDH2_IDH1 <- dplyr::filter(fPPI, Gene1 %in% Diff_Prot_exp_IDH_sig$name | Gene2 %in% Diff_Prot_exp_IDH_sig$name)

Saving network

fPPI_IDH1_wt %>%
  write.table("~/GitHub/Thesis_paper/Results/Proteo/Cytoscape/fPPI_IDH1_wt.tsv", sep = "\t", quote = F, col.names = NA)
fPPI_IDH2_wt %>%
  write.table("~/GitHub/Thesis_paper/Results/Proteo/Cytoscape/fPPI_IDH2_wt.tsv", sep = "\t", quote = F, col.names = NA)
fPPI_IDH2_IDH1 %>%
  write.table("~/GitHub/Thesis_paper/Results/Proteo/Cytoscape/fPPI_IDH2_IDH1.tsv", sep = "\t", quote = F, col.names = NA)

Comparing Networks

Functions

Mergeing_networks <- function(Network_A, Network_B, Main_Network, Network_C = NULL, Network_D=NULL){
  nodes_network_A <- paste(Network_A$Gene1, Network_A$Gene2, sep = "_") %>% unique
  nodes_network_B <- paste(Network_B$Gene1, Network_B$Gene2, sep = "_") %>% unique
  if(!is.null(Network_C)){
    nodes_network_C <- paste(Network_C$Gene1, Network_C$Gene2, sep = "_") %>% unique
    nodes_network_B <- intersect(nodes_network_B, nodes_network_C)
  }
  if(!is.null(Network_D)){
    nodes_network_D <- paste(Network_D$Gene1, Network_D$Gene2, sep = "_") %>% unique
    nodes_network_B <- intersect(nodes_network_B, nodes_network_D)
  }
  common_nodes <- intersect(nodes_network_A, nodes_network_B)
  Main_Network$edge <- paste(Main_Network$Gene1, Main_Network$Gene2, sep = "_")
  
  dplyr::filter(Main_Network, edge %in% common_nodes)
}

Distinct_networks <- function(Network_A, Network_B, Main_Network){
  edges_network_A <- paste(Network_A$Gene1, Network_A$Gene2, sep = "_") %>% unique
  edges_network_B <- paste(Network_B$Gene1, Network_B$Gene2, sep = "_") %>% unique
  
  specific_A <- edges_network_A[edges_network_A %ni% edges_network_B]
  specific_B <- edges_network_B[edges_network_B %ni% edges_network_A]

  Main_Network$edge <- paste(Main_Network$Gene1, Main_Network$Gene2, sep = "_")
  
  specific_network_A <- Main_Network[Main_Network$edge %in% specific_A,]
  specific_network_B <- Main_Network[Main_Network$edge %in% specific_B,]
  
  list("Network_A" = specific_network_A,
       "Network_B" = specific_network_B)
}

Mergeing

IDH1_IDH2_common <- Mergeing_networks(fPPI_IDH1_wt, fPPI_IDH2_wt, fPPI)
IDH1_IDH2_specifics <- Distinct_networks(fPPI_IDH1_wt, fPPI_IDH2_wt, fPPI)

Saving merged

IDH1_IDH2_common %>% write.table("~/GitHub/Thesis_paper/Results/Proteo/Cytoscape/IDH1_IDH2_Common.tsv", 
                                 sep="\t", quote = F, col.names = NA)
IDH1_IDH2_specifics$Network_A %>% write.table("~/GitHub/Thesis_paper/Results/Proteo/Cytoscape/IDH1_specific.tsv", 
                                 sep="\t", quote = F, col.names = NA)
IDH1_IDH2_specifics$Network_B %>% write.table("~/GitHub/Thesis_paper/Results/Proteo/Cytoscape/IDH2_specific.tsv", 
                                 sep="\t", quote = F, col.names = NA)
save.image("/media/alexis/DATA/Session/R_session/Thesis_paper_Rsessions/Proteomic/Proteo_diff_enrich.RData")